package main

import (
	"flag"
	"gark-crawlers/config"
	"gark-crawlers/engine"
	"gark-crawlers/parser/zhenai"
	"gark-crawlers/persitent/client"
	"gark-crawlers/rpcsupport"
	"gark-crawlers/scheduler"
	client2 "gark-crawlers/worker/client"
	"log"
	"net/rpc"
	"strings"
)

var (
	itemSaverHost = flag.String("itemsaver_host", "", "itemsaver host")
	workerHosts   = flag.String("worker_hosts", "", "worker hosts(comma separated)")
)

func main() {
	flag.Parse()
	itemChan, err := client.ItemSaver(*itemSaverHost)
	if err != nil {
		panic(err)
	}

	// 并发版本的爬虫运行
	//e := engine.ConcurrentEngine{
	//	Scheduler:   &scheduler.QueuedScheduler{},
	//	WorkerCount: 10,
	//	ItemChan:    itemChan,
	//	RequestProcessor: engine.Worker,
	//}

	pool := createProcessorPool(strings.Split(*workerHosts, ","))

	processor := client2.CreateProcessor(pool)
	e := engine.ConcurrentEngine{
		Scheduler:        &scheduler.QueuedScheduler{},
		WorkerCount:      10,
		ItemChan:         itemChan,
		RequestProcessor: processor,
	}

	e.Run(engine.Request{
		Url:    "http://www.zhenai.com/zhenghun",
		Parser: engine.NewFuncParser(zhenai.ParseCityList, config.ParseCityList),
	})

	//e.Run(engine.Request{
	//	Url:        "http://www.zhenai.com/zhenghun/shanghai",
	//	ParserFunc: zhenai.ParseCity,
	//})
}

func createProcessorPool(hosts []string) chan *rpc.Client {
	var clients []*rpc.Client
	for _, h := range hosts {
		newClient, err := rpcsupport.NewClient(h)
		if err == nil {
			clients = append(clients, newClient)
			log.Printf("success connecting host:%s;", h)
		} else {
			log.Printf("err connecting host:%s failed; error: %v", h, err)
		}
	}
	out := make(chan *rpc.Client)
	go func() {
		for {
			for _, c := range clients {
				out <- c
			}
		}
	}()
	return out
}
